library(NbClust)
library(fastcluster)
library(FactoMineR)
source("longTAPIO.R")
source("longTAPIO_try.R")
source("TAPIO.R")
source("calc_SIL.R")
source("association.R")
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
set.seed(123)
ex2 <- kml::generateArtificialLongData(
meanTrajectories=list(function(t)0,function(t)-t,function(t)t),
nbEachClusters=c(50,50,50),
residualVariation=function(t){rnorm(1,0,0.35)}
)
trueClusIDs = rep(1:3,each=50)
#plot(ex2,parTraj=parTRAJ(col=rep(2:4,each=50)))
x =attr(ex2, "traj") #columns: time, rows: subjects
user_ids = rep(1:nrow(x), each = ncol(x))
matplot(t(x),type="l",lty=1, col= trueClusIDs) ;grid()
res = longTAPIO_try(matrix(as.vector(t(x)),ncol=1), k = 3, user_id =user_ids, levels=3, verbose = 1)
## data dimension: 1650 1
foundClusIDs = res$cl
#confusion matrix
table(foundClusIDs, trueClusIDs)
## trueClusIDs
## foundClusIDs 1 2 3
## 1 50 0 0
## 2 0 50 0
## 3 0 0 50
#compare to row sampliung:
DATA = matrix(as.vector(t(x)),ncol=1)
rownames(DATA) = user_ids
res2 = longTAPIO(DATA, k = 3, levels=3)
foundClusIDs = res2$cl
#confusion matrix (well within the ambiguity of renumbering)
table(foundClusIDs, trueClusIDs)
## trueClusIDs
## foundClusIDs 1 2 3
## 1 47 0 0
## 2 3 0 50
## 3 0 50 0
res = longTAPIO_try(D_norm, user_id = rownames(D_norm), k=3, n_trees=1000, levels=3)
trueClusIDs = aggregate(as.numeric(outcome),FUN= Mode,by = list(rownames(D_norm)))[,2]
foundClusIDs = res$cl
#confusion matrix (well within the ambiguity of renumbering)
table(foundClusIDs, trueClusIDs)
## trueClusIDs
## foundClusIDs 1 2 3
## 1 3 0 0
## 2 0 0 3
## 3 0 4 0
#sample size seems too low for any conclusion
res = longTAPIO(D_norm, k=3, n_trees=1000, levels=3)
trueClusIDs = aggregate(as.numeric(outcome),FUN= Mode,by = list(rownames(D_norm)))[,2]
foundClusIDs = res$cl
#confusion matrix (well within the ambiguity of renumbering)
table(foundClusIDs, trueClusIDs)
## trueClusIDs
## foundClusIDs 1 2 3
## 1 2 0 1
## 2 1 3 0
## 3 0 1 2
#sample size seems too low for any conclusion
p = "data/"
load(paste0(p, "Longdat_clusters.rda"))
clusterMLD::MeanPlot(output)
Interpolated Data at fixed times:
load("data/LongDat_interpolated.rda")
res = longTAPIO_try(as.matrix(y_int[,1:5]), k = 4, user_id = y_int$id, levels=4, verbose = 1)
## data dimension: 2000 5
trueClusIDs = aggregate(y_int$label, function(x) return(x[1]), by = list(y_int$id))[,2]
foundClusIDs = res$cl
#confusion matrix
table(foundClusIDs, trueClusIDs)
## trueClusIDs
## foundClusIDs 1 2 3 4
## 1 5 5 0 0
## 2 0 60 0 0
## 3 0 0 65 0
## 4 0 0 0 65
#compare to row sampliung:
DATA = as.matrix(y_int[,1:5])
rownames(DATA) = y_int$id
res2 = longTAPIO(DATA, k = 4, levels=4)
foundClusIDs = res2$cl
#confusion matrix (well within the ambiguity of renumbering)
table(foundClusIDs, trueClusIDs)
## trueClusIDs
## foundClusIDs 1 2 3 4
## 1 5 20 0 0
## 2 0 45 7 0
## 3 0 0 57 0
## 4 0 0 1 65